In [9]:
import os
from graph_tool.all import *
import codecs
from collections import defaultdict, OrderedDict
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline 
from igraph import *
from scipy.stats.stats import pearsonr
from scipy import stats

In [10]:
WORKING_FOLDER = '../../../DATA/FIN/'
os.chdir(WORKING_FOLDER)
f_in_graph = 'mention_graph_weights.dat'
# this is a .tree file
f_in_Infomap_output_comm = 'dir_weighted.tree'

Semantic capital


In [11]:
def read_sem_capital(f_name='user_entities.tab', tname='entities'):
    f = open(f_name, "r")
    cap = defaultdict(int)
    cnt = 0
    for line in f:
        if tname == 'sentiment':
            (vid, vn, val) = line.split('\t')
            val = float(val)
        else:
            (vid, val) = line.split('\t')
            val = float(val)
        cap[vid] = val
        cnt += 1
    return cap

In [13]:
def find_avg_SEM_cap_in_the_network():

    sem_cap = read_sem_capital()
    
    res = sem_cap.values()
    
    res_mean = np.mean(np.array(res))
    res_stdev = np.std(np.array(res))
    
    return res_mean, res_stdev

find_avg_SEM_cap_in_the_network()


Out[13]:
(10.719749828278081, 8.0371401642030165)

Status inconcistency


In [14]:
def find_avg_ST_INC_in_the_network():
    
    sem_cap = read_sem_capital(f_name='status_inconsistency', tname='status_inconsistency')
    
    res = sem_cap.values()
    
    res_mean = np.mean(np.array(res))
    res_stdev = np.std(np.array(res))
    
    return res_mean, res_stdev

find_avg_ST_INC_in_the_network()


Out[14]:
(-0.10322751746229998, 0.62554694747423045)

Semantic relatedness


In [18]:
def find_avg_SR_in_the_undir_network():
    fn = 'directed_mention_graph_with_SR_NCOL_edgelist'
    f = open(fn, 'r')
    res = []
    for line in f:
        (u1, u2, SR) = line.split()
        res.append(float(SR.replace(',','.')))
            
    res_mean = np.mean(np.array(res))
    res_stdev = np.std(np.array(res))
    
    return res_mean, res_stdev

find_avg_SR_in_the_undir_network()


Out[18]:
(0.23445552024599237, 0.295632719144103)

In [25]:
def SR_graphs_stats():
    
    print 'DIR'
    G = Graph.Read_Ncol('directed_mention_graph_with_SR_NCOL_edgelist', weights=True, names=True)
    print G.summary()
    
    print 'UNDIR'
    G = Graph.Read_Ncol('undirected_mention_graph_with_SR_NCOL_edgelist', directed=False, weights=True, names=True)
    print G.summary()
    
SR_graphs_stats()


DIR
IGRAPH DNW- 26717 99910 -- 
+ attr: name (v), weight (e)
UNDIR
IGRAPH UNW- 22512 29950 -- 
+ attr: name (v), weight (e)

In [ ]:


In [ ]: